Algo Trading Project - Hugo Roccaro
1. Pre-requisities¶
1.1 Librairies¶
import os
import warnings
import numpy as np
import pandas as pd
# For PCA
import statsmodels.api as sm
from statsmodels.multivariate import pca
from ppca import PPCA
#For portfolio optimization
from scipy.optimize import minimize
import cvxpy as cp
# For plotting and formatting the plots
import matplotlib.pyplot as plt
import matplotlib.ticker as mtkr
import matplotlib.dates as mdts
import seaborn as sns
import plotly
import plotly.graph_objects as go
import plotly.express as px
warnings.filterwarnings("ignore")
plotly.offline.init_notebook_mode()
1.2 Path¶
os.chdir("/Users/hroccaro/Desktop/Algo trading")
1.3 Useful Functions¶
# Funtion to make a time series plot on one axis
def plot_timeseries_one_axis(data, y_label=None):
"""
Plot a time series graph using a single axis.
Args
----
data : pandas DataFrame
A DataFrame with the time series data to be plotted. Dates should be set as the index.
y_label : str, optional
The label for the y-axis. If None, no label is set. Default is None.
Returns
-------
fig : matplotlib.figure.Figure
The `Figure` object which acts as the container for the plot. It can be used for further customizations or saving the plot.
ax : matplotlib.axes.Axes
The `Axes` object representing the plot of the time series. It provides access to axes-level functions for further customization.
"""
fig, ax = plt.subplots(nrows=1, ncols=1)
ax.grid(visible=True, linestyle='dashed', lw=0.35, color='lightgray')
data.plot(ax=ax, lw=1.)
if y_label is not None:
ax.set_ylabel(ylabel=y_label)
ax.xaxis.set_major_formatter(mdts.DateFormatter('%b-%y'))
ax.legend(loc='best', framealpha=0)
return fig, ax
# Function to make a correlation heatmap
def plot_heatmap(data_df, ax=None, cbar_kw=None, cbarlabel="Correlation", annotate=True, **kwargs):
"""
Create and annotate a heatmap from a pandas DataFrame.
Args
----
data_df : pandas DataFrame
A DataFrame with the data to be plotted. Row and column labels are taken from the DataFrame.
ax : matplotlib.axes.Axes, optional
A `matplotlib.axes.Axes` instance to which the heatmap is plotted.
cbar_kw : dict, optional
A dictionary with arguments to `matplotlib.Figure.colorbar`.
cbarlabel : str, optional
The label for the colorbar.
annotate : bool, optional
Flag to annotate the heatmap. Default is True.
**kwargs
All other arguments are forwarded to `imshow`.
Returns
-------
im : matplotlib.image.AxesImage
The `AxesImage` object created by `imshow` representing the heatmap. It can be used for further customizations if needed.
cbar : matplotlib.colorbar.Colorbar
The `Colorbar` object for the heatmap. It represents the color scale of the heatmap and is attached to the provided axes. This can also be used for further customizations, like changing the colorbar properties.
"""
if ax is None:
ax = plt.gca()
if cbar_kw is None:
cbar_kw = {}
# Extract data and labels from the DataFrame
data = data_df.values
row_labels = data_df.index
col_labels = data_df.columns
# Plot the heatmap
im = ax.imshow(data, **kwargs)
# Create colorbar
cbar = ax.figure.colorbar(im, ax=ax, **cbar_kw, shrink=0.8, format=mtkr.FuncFormatter(lambda x, _: '{:.0%}'.format(x)))
cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom")
# Show all ticks and label them with the respective list entries.
ax.set_xticks(np.arange(data.shape[1]), labels=col_labels)
ax.set_yticks(np.arange(data.shape[0]), labels=row_labels)
# Let the horizontal axes labeling appear on top.
ax.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False)
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=-30, ha="right", rotation_mode="anchor")
# Turn spines off and create white grid.
ax.spines[:].set_visible(False)
ax.set_xticks(np.arange(data.shape[1]+1)-.5, minor=True)
ax.set_yticks(np.arange(data.shape[0]+1)-.5, minor=True)
ax.grid(which="minor", color="w", linestyle='-', linewidth=3)
ax.tick_params(which="minor", bottom=False, left=False)
# Annotate the heatmap
if annotate:
# Normalizing the threshold
threshold = im.norm(data.max())/2.
# Setting up the format for annotations
valfmt = mtkr.FuncFormatter(lambda x, _: '{:.0%}'.format(x))
# Loop over the data and create a `Text` for each "pixel".
textcolors = ("black", "white")
for i in range(data.shape[0]):
for j in range(data.shape[1]):
color = textcolors[int(im.norm(data[i, j]) > threshold)]
im.axes.text(j, i, valfmt(data[i, j], None),
ha="center", va="center", color=color)
return im, cbar
# Function for computing portfolio return
def portfolio_returns(daily_returns, weights):
"""
Calculate the annualized return of a portfolio based on daily returns and portfolio weights.
Args
----
daily_returns : pandas DataFrame or array_like
The daily returns of the assets in the portfolio. Each column should represent an asset.
weights : array_like
Weights of the assets in the portfolio. The sum of all weights should be 1.
Returns
-------
returns : float
The annualized return of the portfolio. This is computed as the sum of the mean of daily returns
of each asset, weighted by the portfolio weights, and then annualized (typically using 253 trading days).
"""
returns = (np.sum(daily_returns.mean() * weights)) * 253
return returns
# Function for computing standard deviation of portfolio returns
def portfolio_sd(daily_returns, weights):
"""
Calculate the standard deviation of annualized returns of a portfolio, representing the portfolio risk.
Args
----
daily_returns : pandas DataFrame or array_like
The daily returns of the assets in the portfolio. Each column should represent an asset.
weights : array_like
Weights of the assets in the portfolio. The sum of all weights should be 1.
Returns
-------
sd : float
The standard deviation of the portfolio's annualized returns, which represents the portfolio risk.
Calculated using the covariance matrix of the daily returns, weighted by the portfolio weights,
and annualized (typically using 253 trading days).
"""
sd = np.sqrt(np.dot(np.transpose(weights), np.dot(daily_returns.cov() * 253, weights)))
return sd
def optimize_long_only_min_variance_portfolio(returns):
"""
Optimize a long-only minimum variance portfolio over time, with constraints on allocation per factor and trade limits.
Args
----
returns : pandas DataFrame
A DataFrame containing the returns of different factors. Each column represents a factor, and each row represents a time period (e.g., a month).
Returns
-------
optimized_weights : pandas DataFrame
A DataFrame containing the optimized weights for each factor in each time period. Each column represents a factor, and each row represents the optimized weights for that factor at a specific time period.
Notes
-----
The optimization is conducted sequentially for each time period, starting with an equal distribution among factors.
Constraints are applied to ensure the total allocation sums to 1, no factor exceeds a 0.25 allocation, and the portfolio remains long-only.
Additional constraints limit the total trading movement and the movement per factor between consecutive time periods.
"""
nb_factors = returns.shape[1]
nb_months = returns.shape[0]
# Initialize a DataFrame to store the optimized portfolio weights
optimized_weights = pd.DataFrame(0.0, index=returns.index, columns=returns.columns, dtype=float)
# Starting with an equal distribution among factors
initial_weights = np.array([1.0 / nb_factors] * nb_factors)
optimized_weights.iloc[0] = initial_weights
for i in range(1, nb_months):
# Calculate the covariance matrix for the current month
covariance_matrix = np.cov(returns.iloc[:i].values, rowvar=False)
# Symmetrize and regularize the covariance matrix
covariance_matrix = (covariance_matrix + covariance_matrix.T) / 2 + 1e-8 * np.eye(nb_factors)
# Define the optimization problem
weights = cp.Variable(nb_factors)
portfolio_variance = cp.quad_form(weights, covariance_matrix)
objective = cp.Minimize(portfolio_variance)
constraints = [
cp.sum(weights) == 1, # Total allocation must be 1
cp.max(weights) <= 0.25, # No factor can have more than 0.25 allocation
cp.min(weights) >= 0, # Long only portfolio
cp.norm(weights - optimized_weights.iloc[i-1].values, 1) <= 0.15, # Total trading not more than 0.15
cp.abs(weights - optimized_weights.iloc[i-1].values) <= 0.05 # Cannot trade more than 0.05 per factor
]
# Solve the problem
prob = cp.Problem(objective, constraints)
prob.solve()
# Store the optimized weights
optimized_weights.iloc[i] = weights.value
return optimized_weights
def optimize_long_short_min_variance_portfolio(returns):
"""
Optimize a long-short minimum variance portfolio over time, with constraints on allocation per factor and trade limits.
Args
----
returns : pandas DataFrame
A DataFrame containing the returns of different factors. Each column represents a factor, and each row represents a time period (e.g., a month).
Returns
-------
optimized_weights : pandas DataFrame
A DataFrame containing the optimized weights for each factor in each time period. Each column represents a factor, and each row represents the optimized weights for that factor at a specific time period.
"""
nb_factors = returns.shape[1]
nb_months = returns.shape[0]
# Initialize a DataFrame to store the optimized portfolio weights
optimized_weights = pd.DataFrame(0.0, index=returns.index, columns=returns.columns, dtype=float)
# Starting with an equal distribution among factors
initial_weights = np.array([1.0 / nb_factors] * nb_factors)
optimized_weights.iloc[0] = initial_weights
for i in range(1, nb_months):
# Calculate the covariance matrix for the current month
covariance_matrix = np.cov(returns.iloc[:i].values, rowvar=False)
# Symmetrize and regularize the covariance matrix
covariance_matrix = (covariance_matrix + covariance_matrix.T) / 2 + 1e-8 * np.eye(nb_factors)
# Define the optimization problem
weights = cp.Variable(nb_factors)
abs_weights = cp.Variable(nb_factors, nonneg=True)
portfolio_variance = cp.quad_form(weights, covariance_matrix)
objective = cp.Minimize(portfolio_variance)
constraints = [
cp.sum(abs_weights) == 1,
cp.max(weights) <= 0.25, # No factor can have more than 0.25 allocation
cp.norm(weights - optimized_weights.iloc[i-1].values, 1) <= 0.15, # Total trading not more than 0.15
cp.abs(weights - optimized_weights.iloc[i-1].values) <= 0.05 # Cannot trade more than 0.05 per factor
]
# Solve the problem
prob = cp.Problem(objective, constraints)
prob.solve(solver=cp.SCS)
# Store the optimized weights
optimized_weights.iloc[i] = weights.value
return optimized_weights
2. PCA¶
2.1 Data Preproccesing¶
# Read Data
change_freq = '4w'
data_pca = pd.read_csv(filepath_or_buffer='_'.join(['Multi-asset PCA', 'CHANGES=' + change_freq]) + '.csv',
index_col=0, header=0,
parse_dates=True).astype(np.float64)
n_assets = data_pca.shape[1]
# Show the dateframe
data_pca
| MSCI World | S&P 500 | Euro Stoxx 50 | Euro Stoxx Banks | CDX.IG 5Y | CDX.HY 5Y | iTraxx Europe 5Y | iTraxx Crossover 5Y | Italy - Germany 10Y | US 2Y | ... | USD 3M x 10Y ATM vol. | USD 1Y x 10Y ATM vol. | USD 5Y x 5Y ATM vol. | USD 10Y x 20Y ATM vol. | US Govt. | European Govt. | US Corp. Bonds | European Corp. Bonds | US High Yield | European High Yield | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2005-06-10 | 0.012532 | 0.022793 | 0.056718 | 0.047424 | 0.006602 | 0.026766 | 0.006048 | 0.035003 | 4.9 | 1.65 | ... | 0.00 | -0.400 | 2.65 | -0.15 | 0.010386 | 0.017674 | 0.013421 | 0.019308 | 0.029171 | 0.029893 |
| 2005-07-06 | 0.002255 | -0.002649 | 0.025209 | 0.017424 | -0.000800 | 0.006752 | 0.001608 | 0.012723 | 0.9 | 7.43 | ... | 0.95 | 0.950 | -2.10 | -1.80 | 0.001192 | -0.004078 | 0.002829 | -0.002100 | 0.009255 | 0.009683 |
| 2005-08-03 | 0.046502 | 0.041080 | 0.036400 | 0.041141 | 0.003598 | 0.018337 | 0.001442 | 0.003727 | -2.9 | 24.17 | ... | 0.15 | -1.150 | -2.55 | -2.20 | -0.008140 | -0.005336 | -0.004783 | -0.005385 | 0.015161 | 0.014480 |
| 2005-08-31 | -0.008268 | -0.020054 | -0.024171 | -0.019341 | 0.001110 | -0.003190 | 0.000003 | 0.002291 | 1.6 | -19.54 | ... | -2.95 | 0.600 | 1.95 | 2.75 | 0.016193 | 0.015940 | 0.016234 | 0.016116 | 0.003449 | 0.007675 |
| 2005-09-27 | 0.010540 | -0.003834 | 0.036243 | 0.042163 | 0.000609 | -0.001547 | 0.001839 | 0.008034 | -0.1 | 25.54 | ... | -0.05 | -0.750 | -0.70 | 0.35 | -0.011720 | 0.000517 | -0.014359 | -0.000039 | -0.009020 | 0.001002 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2022-11-30 | 0.084752 | 0.081787 | 0.090406 | 0.073881 | 0.007581 | 0.030603 | 0.009734 | 0.036006 | -20.9 | -30.95 | ... | -14.10 | -4.654 | -2.90 | -2.30 | 0.026953 | 0.022098 | 0.047535 | 0.025662 | 0.021258 | 0.032360 |
| 2022-12-28 | -0.055047 | -0.075548 | -0.040116 | 0.012875 | -0.003171 | -0.014656 | 0.000119 | -0.004665 | 17.9 | 4.10 | ... | 4.60 | 3.006 | 2.50 | 2.00 | -0.006943 | -0.045961 | -0.006454 | -0.019763 | -0.006197 | -0.008446 |
| 2023-01-25 | 0.068134 | 0.059766 | 0.085333 | 0.101296 | 0.006157 | 0.028027 | 0.006219 | 0.024940 | -33.4 | -22.62 | ... | -17.90 | -21.406 | -14.50 | -5.50 | 0.028097 | 0.030792 | 0.041032 | 0.027126 | 0.035909 | 0.029603 |
| 2023-02-22 | -0.009421 | -0.006287 | 0.022589 | 0.063275 | -0.001573 | -0.007013 | 0.000186 | 0.003366 | 15.3 | 56.83 | ... | 2.40 | 3.907 | 4.30 | 3.40 | -0.025541 | -0.026568 | -0.031878 | -0.012932 | -0.019851 | 0.002439 |
| 2023-03-22 | -0.016522 | -0.013643 | -0.011182 | -0.096528 | -0.000676 | -0.013304 | -0.000156 | -0.002206 | -9.9 | -75.66 | ... | 9.20 | 1.447 | 2.70 | -1.10 | 0.029796 | 0.014531 | 0.023493 | -0.000127 | 0.005266 | -0.009522 |
232 rows × 40 columns
# Normalise the data to have zero mean and standard deviation 1; show the normalised data
data_pca_normalised = (data_pca - data_pca.mean()) / data_pca.std()
data_pca_normalised
| MSCI World | S&P 500 | Euro Stoxx 50 | Euro Stoxx Banks | CDX.IG 5Y | CDX.HY 5Y | iTraxx Europe 5Y | iTraxx Crossover 5Y | Italy - Germany 10Y | US 2Y | ... | USD 3M x 10Y ATM vol. | USD 1Y x 10Y ATM vol. | USD 5Y x 5Y ATM vol. | USD 10Y x 20Y ATM vol. | US Govt. | European Govt. | US Corp. Bonds | European Corp. Bonds | US High Yield | European High Yield | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2005-06-10 | 0.167450 | 0.343277 | 1.016358 | 0.552594 | 1.021668 | 0.940069 | 0.834451 | 1.310608 | 0.148132 | 0.072246 | ... | -0.012515 | -0.059194 | 0.499113 | -0.047339 | 0.637123 | 1.088591 | 0.489450 | 1.186558 | 0.785765 | 0.764510 |
| 2005-07-06 | -0.027851 | -0.153893 | 0.436512 | 0.233379 | -0.263237 | 0.130086 | 0.121136 | 0.364581 | 0.006282 | 0.343949 | ... | 0.066179 | 0.112389 | -0.404934 | -0.546551 | -0.067827 | -0.391676 | -0.009991 | -0.261775 | 0.146020 | 0.152702 |
| 2005-08-03 | 0.812959 | 0.700608 | 0.642461 | 0.485735 | 0.500203 | 0.598946 | 0.094529 | -0.017407 | -0.128475 | 1.130855 | ... | -0.000089 | -0.154518 | -0.490581 | -0.667572 | -0.783406 | -0.477266 | -0.368965 | -0.483981 | 0.335740 | 0.297913 |
| 2005-08-31 | -0.227815 | -0.494001 | -0.472199 | -0.157813 | 0.068151 | -0.272288 | -0.136733 | -0.078345 | 0.031106 | -0.923843 | ... | -0.256881 | 0.067905 | 0.365885 | 0.830064 | 1.082408 | 0.970536 | 0.622093 | 0.970584 | -0.040484 | 0.091916 |
| 2005-09-27 | 0.129590 | -0.177045 | 0.639576 | 0.496616 | -0.018805 | -0.205800 | 0.158257 | 0.165498 | -0.029180 | 1.195255 | ... | -0.016656 | -0.103678 | -0.138478 | 0.103937 | -1.057891 | -0.078988 | -0.820468 | -0.122343 | -0.441008 | -0.110103 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2022-11-30 | 1.539804 | 1.496070 | 1.636306 | 0.834108 | 1.191457 | 1.095391 | 1.426497 | 1.353180 | -0.766798 | -1.460199 | ... | -1.180501 | -0.599871 | -0.557195 | -0.697827 | 1.907484 | 1.389646 | 2.098054 | 1.616380 | 0.531607 | 0.839217 |
| 2022-12-28 | -1.116731 | -1.578400 | -0.765623 | 0.184979 | -0.674976 | -0.736341 | -0.118090 | -0.373734 | 0.609143 | 0.187414 | ... | 0.368531 | 0.373703 | 0.470564 | 0.603149 | -0.691614 | -3.241830 | -0.447752 | -1.456655 | -0.350336 | -0.396110 |
| 2023-01-25 | 1.224006 | 1.065746 | 1.542955 | 1.125815 | 0.944331 | 0.991104 | 0.861899 | 0.883335 | -1.210078 | -1.068626 | ... | -1.495278 | -2.729024 | -2.764973 | -1.665996 | 1.995159 | 1.981227 | 1.791424 | 1.715428 | 1.002223 | 0.755746 |
| 2023-02-22 | -0.249724 | -0.224972 | 0.388311 | 0.721253 | -0.397561 | -0.426986 | -0.107307 | -0.032696 | 0.516941 | 2.666119 | ... | 0.186292 | 0.488219 | 0.813150 | 1.026723 | -2.117699 | -1.922162 | -1.646589 | -0.994542 | -0.788922 | -0.066579 |
| 2023-03-22 | -0.384663 | -0.368717 | -0.233170 | -0.979117 | -0.241827 | -0.681624 | -0.162214 | -0.269298 | -0.376711 | -3.561904 | ... | 0.749576 | 0.175557 | 0.508629 | -0.334764 | 2.125446 | 0.874675 | 0.964376 | -0.128285 | 0.017894 | -0.428692 |
232 rows × 40 columns
# Show correlation data
data_pca_corr = data_pca_normalised.corr()
data_pca_corr.style.format(formatter='{:.0%}')
| MSCI World | S&P 500 | Euro Stoxx 50 | Euro Stoxx Banks | CDX.IG 5Y | CDX.HY 5Y | iTraxx Europe 5Y | iTraxx Crossover 5Y | Italy - Germany 10Y | US 2Y | US 5Y | US 10Y | Germany 2Y | Germany 5Y | Germany 10Y | US B/E Inflation 10Y | Dollar Index | EUR/USD | USD/JPY | Precious Metals | Industrial Metals | Crude Oil | Agriculture | S&P 500 1M ATM imp. vol. | S&P 500 1M 80% imp. vol. | EUR/USD 3M ATM vol. | USD/JPY 3M ATM vol. | AUD/USD 3M ATM vol. | USD/CAD 3M ATM vol. | USD 2Y x 2Y ATM vol. | USD 3M x 10Y ATM vol. | USD 1Y x 10Y ATM vol. | USD 5Y x 5Y ATM vol. | USD 10Y x 20Y ATM vol. | US Govt. | European Govt. | US Corp. Bonds | European Corp. Bonds | US High Yield | European High Yield | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MSCI World | 100% | 98% | 89% | 71% | 79% | 85% | 71% | 75% | -25% | 22% | 22% | 17% | 25% | 20% | 15% | 60% | -53% | 48% | 4% | 33% | 54% | 41% | 27% | -81% | -65% | -46% | -46% | -69% | -53% | -20% | -44% | -41% | -33% | -39% | -16% | 7% | 53% | 49% | 77% | 74% |
| S&P 500 | 98% | 100% | 85% | 64% | 79% | 84% | 69% | 72% | -21% | 23% | 23% | 17% | 20% | 16% | 11% | 57% | -42% | 38% | 9% | 26% | 49% | 35% | 23% | -82% | -66% | -44% | -47% | -68% | -53% | -19% | -46% | -42% | -33% | -37% | -16% | 9% | 52% | 51% | 76% | 71% |
| Euro Stoxx 50 | 89% | 85% | 100% | 86% | 76% | 79% | 77% | 79% | -35% | 28% | 30% | 27% | 28% | 24% | 22% | 52% | -33% | 30% | 12% | 17% | 44% | 36% | 18% | -71% | -59% | -43% | -43% | -55% | -46% | -15% | -39% | -35% | -29% | -35% | -26% | 6% | 38% | 46% | 66% | 69% |
| Euro Stoxx Banks | 71% | 64% | 86% | 100% | 63% | 65% | 68% | 66% | -44% | 33% | 36% | 35% | 38% | 39% | 36% | 49% | -35% | 34% | 15% | 7% | 45% | 41% | 20% | -51% | -42% | -38% | -35% | -39% | -41% | -12% | -29% | -30% | -27% | -31% | -36% | -9% | 19% | 28% | 55% | 60% |
| CDX.IG 5Y | 79% | 79% | 76% | 63% | 100% | 83% | 90% | 78% | -30% | 33% | 31% | 26% | 24% | 24% | 22% | 51% | -30% | 28% | 12% | 17% | 36% | 26% | 13% | -71% | -61% | -43% | -43% | -53% | -49% | -25% | -41% | -41% | -34% | -28% | -27% | 2% | 40% | 42% | 72% | 70% |
| CDX.HY 5Y | 85% | 84% | 79% | 65% | 83% | 100% | 79% | 89% | -24% | 28% | 27% | 23% | 21% | 19% | 16% | 51% | -34% | 29% | 5% | 19% | 46% | 37% | 18% | -70% | -56% | -45% | -43% | -63% | -52% | -19% | -46% | -41% | -31% | -35% | -22% | 7% | 51% | 52% | 81% | 76% |
| iTraxx Europe 5Y | 71% | 69% | 77% | 68% | 90% | 79% | 100% | 87% | -46% | 32% | 34% | 34% | 27% | 28% | 28% | 47% | -25% | 23% | 14% | 9% | 35% | 24% | 8% | -60% | -51% | -51% | -41% | -48% | -51% | -12% | -38% | -32% | -22% | -21% | -33% | 4% | 33% | 44% | 65% | 69% |
| iTraxx Crossover 5Y | 75% | 72% | 79% | 66% | 78% | 89% | 87% | 100% | -34% | 31% | 34% | 33% | 27% | 23% | 21% | 49% | -26% | 21% | 13% | 15% | 45% | 36% | 13% | -63% | -52% | -54% | -45% | -59% | -53% | -6% | -41% | -29% | -16% | -24% | -32% | 6% | 41% | 51% | 75% | 76% |
| Italy - Germany 10Y | -25% | -21% | -35% | -44% | -30% | -24% | -46% | -34% | 100% | -5% | -12% | -18% | -14% | -21% | -22% | -21% | 25% | -29% | -13% | -2% | -16% | -10% | -2% | 17% | 15% | 25% | 8% | 10% | 19% | -1% | 5% | 7% | 5% | 2% | 16% | -15% | -2% | -12% | -14% | -23% |
| US 2Y | 22% | 23% | 28% | 33% | 33% | 28% | 32% | 31% | -5% | 100% | 89% | 76% | 59% | 62% | 60% | 34% | 19% | -15% | 53% | -16% | 18% | 25% | 13% | -30% | -25% | -24% | -26% | -22% | -30% | 15% | -10% | -3% | 1% | -5% | -81% | -51% | -36% | -18% | 14% | 18% |
| US 5Y | 22% | 23% | 30% | 36% | 31% | 27% | 34% | 34% | -12% | 89% | 100% | 94% | 59% | 70% | 76% | 43% | 16% | -13% | 56% | -19% | 20% | 28% | 15% | -28% | -22% | -25% | -18% | -19% | -29% | 36% | 6% | 17% | 18% | -2% | -95% | -63% | -45% | -26% | 14% | 19% |
| US 10Y | 17% | 17% | 27% | 35% | 26% | 23% | 34% | 33% | -18% | 76% | 94% | 100% | 54% | 69% | 81% | 46% | 14% | -11% | 52% | -20% | 22% | 30% | 17% | -20% | -16% | -27% | -13% | -12% | -27% | 46% | 13% | 29% | 31% | 6% | -98% | -66% | -52% | -30% | 11% | 17% |
| Germany 2Y | 25% | 20% | 28% | 38% | 24% | 21% | 27% | 27% | -14% | 59% | 59% | 54% | 100% | 89% | 74% | 33% | -23% | 30% | 32% | 2% | 29% | 29% | 32% | -21% | -20% | -27% | -14% | -21% | -25% | 16% | -1% | 2% | 2% | -12% | -54% | -66% | -23% | -34% | 14% | 18% |
| Germany 5Y | 20% | 16% | 24% | 39% | 24% | 19% | 28% | 23% | -21% | 62% | 70% | 69% | 89% | 100% | 93% | 37% | -18% | 25% | 37% | -4% | 25% | 25% | 29% | -17% | -15% | -21% | -7% | -12% | -21% | 27% | 8% | 13% | 12% | -7% | -68% | -80% | -38% | -48% | 7% | 11% |
| Germany 10Y | 15% | 11% | 22% | 36% | 22% | 16% | 28% | 21% | -22% | 60% | 76% | 81% | 74% | 93% | 100% | 39% | -8% | 14% | 42% | -10% | 21% | 26% | 27% | -13% | -11% | -20% | -5% | -6% | -18% | 37% | 13% | 23% | 22% | 3% | -79% | -85% | -46% | -50% | 5% | 10% |
| US B/E Inflation 10Y | 60% | 57% | 52% | 49% | 51% | 51% | 47% | 49% | -21% | 34% | 43% | 46% | 33% | 37% | 39% | 100% | -32% | 30% | 15% | 29% | 53% | 49% | 40% | -50% | -40% | -36% | -29% | -42% | -44% | 4% | -21% | -12% | -1% | -17% | -46% | -21% | 14% | 20% | 57% | 55% |
| Dollar Index | -53% | -42% | -33% | -35% | -30% | -34% | -25% | -26% | 25% | 19% | 16% | 14% | -23% | -18% | -8% | -32% | 100% | -97% | 46% | -55% | -40% | -31% | -33% | 31% | 26% | 23% | 5% | 36% | 14% | 21% | 11% | 22% | 24% | 26% | -17% | -1% | -37% | -12% | -32% | -30% |
| EUR/USD | 48% | 38% | 30% | 34% | 28% | 29% | 23% | 21% | -29% | -15% | -13% | -11% | 30% | 25% | 14% | 30% | -97% | 100% | -34% | 49% | 37% | 28% | 32% | -31% | -27% | -20% | -1% | -32% | -11% | -19% | -8% | -21% | -24% | -28% | 14% | -8% | 29% | -1% | 23% | 22% |
| USD/JPY | 4% | 9% | 12% | 15% | 12% | 5% | 14% | 13% | -13% | 53% | 56% | 52% | 32% | 37% | 42% | 15% | 46% | -34% | 100% | -36% | 1% | 10% | 0% | -17% | -20% | -16% | -14% | -9% | -24% | 25% | 2% | 11% | 16% | 3% | -53% | -35% | -35% | -20% | 3% | 6% |
| Precious Metals | 33% | 26% | 17% | 7% | 17% | 19% | 9% | 15% | -2% | -16% | -19% | -20% | 2% | -4% | -10% | 29% | -55% | 49% | -36% | 100% | 45% | 15% | 28% | -23% | -19% | -6% | 3% | -27% | -7% | -16% | 1% | -9% | -12% | -9% | 22% | 9% | 35% | 13% | 27% | 20% |
| Industrial Metals | 54% | 49% | 44% | 45% | 36% | 46% | 35% | 45% | -16% | 18% | 20% | 22% | 29% | 25% | 21% | 53% | -40% | 37% | 1% | 45% | 100% | 48% | 43% | -40% | -35% | -41% | -27% | -43% | -45% | 1% | -21% | -15% | -6% | -14% | -22% | -11% | 21% | 19% | 53% | 49% |
| Crude Oil | 41% | 35% | 36% | 41% | 26% | 37% | 24% | 36% | -10% | 25% | 28% | 30% | 29% | 25% | 26% | 49% | -31% | 28% | 10% | 15% | 48% | 100% | 35% | -31% | -30% | -23% | -22% | -29% | -32% | 8% | -11% | -5% | 3% | -10% | -32% | -18% | 8% | 13% | 40% | 38% |
| Agriculture | 27% | 23% | 18% | 20% | 13% | 18% | 8% | 13% | -2% | 13% | 15% | 17% | 32% | 29% | 27% | 40% | -33% | 32% | 0% | 28% | 43% | 35% | 100% | -19% | -21% | -21% | -11% | -22% | -19% | 8% | 3% | 3% | 6% | 0% | -18% | -23% | 5% | -3% | 27% | 26% |
| S&P 500 1M ATM imp. vol. | -81% | -82% | -71% | -51% | -71% | -70% | -60% | -63% | 17% | -30% | -28% | -20% | -21% | -17% | -13% | -50% | 31% | -31% | -17% | -23% | -40% | -31% | -19% | 100% | 84% | 47% | 52% | 68% | 54% | 24% | 48% | 45% | 35% | 39% | 22% | -0% | -38% | -34% | -61% | -53% |
| S&P 500 1M 80% imp. vol. | -65% | -66% | -59% | -42% | -61% | -56% | -51% | -52% | 15% | -25% | -22% | -16% | -20% | -15% | -11% | -40% | 26% | -27% | -20% | -19% | -35% | -30% | -21% | 84% | 100% | 46% | 47% | 63% | 49% | 23% | 44% | 42% | 32% | 36% | 17% | 0% | -33% | -27% | -52% | -45% |
| EUR/USD 3M ATM vol. | -46% | -44% | -43% | -38% | -43% | -45% | -51% | -54% | 25% | -24% | -25% | -27% | -27% | -21% | -20% | -36% | 23% | -20% | -16% | -6% | -41% | -23% | -21% | 47% | 46% | 100% | 59% | 72% | 79% | 3% | 42% | 23% | 6% | 11% | 25% | -2% | -30% | -33% | -57% | -55% |
| USD/JPY 3M ATM vol. | -46% | -47% | -43% | -35% | -43% | -43% | -41% | -45% | 8% | -26% | -18% | -13% | -14% | -7% | -5% | -29% | 5% | -1% | -14% | 3% | -27% | -22% | -11% | 52% | 47% | 59% | 100% | 61% | 62% | 25% | 49% | 41% | 26% | 20% | 15% | -9% | -30% | -35% | -49% | -46% |
| AUD/USD 3M ATM vol. | -69% | -68% | -55% | -39% | -53% | -63% | -48% | -59% | 10% | -22% | -19% | -12% | -21% | -12% | -6% | -42% | 36% | -32% | -9% | -27% | -43% | -29% | -22% | 68% | 63% | 72% | 61% | 100% | 77% | 17% | 43% | 33% | 21% | 24% | 10% | -8% | -55% | -42% | -65% | -59% |
| USD/CAD 3M ATM vol. | -53% | -53% | -46% | -41% | -49% | -52% | -51% | -53% | 19% | -30% | -29% | -27% | -25% | -21% | -18% | -44% | 14% | -11% | -24% | -7% | -45% | -32% | -19% | 54% | 49% | 79% | 62% | 77% | 100% | 14% | 45% | 33% | 16% | 18% | 26% | -3% | -30% | -32% | -59% | -53% |
| USD 2Y x 2Y ATM vol. | -20% | -19% | -15% | -12% | -25% | -19% | -12% | -6% | -1% | 15% | 36% | 46% | 16% | 27% | 37% | 4% | 21% | -19% | 25% | -16% | 1% | 8% | 8% | 24% | 23% | 3% | 25% | 17% | 14% | 100% | 55% | 80% | 84% | 44% | -41% | -36% | -35% | -26% | -18% | -10% |
| USD 3M x 10Y ATM vol. | -44% | -46% | -39% | -29% | -41% | -46% | -38% | -41% | 5% | -10% | 6% | 13% | -1% | 8% | 13% | -21% | 11% | -8% | 2% | 1% | -21% | -11% | 3% | 48% | 44% | 42% | 49% | 43% | 45% | 55% | 100% | 85% | 51% | 34% | -8% | -18% | -39% | -38% | -45% | -36% |
| USD 1Y x 10Y ATM vol. | -41% | -42% | -35% | -30% | -41% | -41% | -32% | -29% | 7% | -3% | 17% | 29% | 2% | 13% | 23% | -12% | 22% | -21% | 11% | -9% | -15% | -5% | 3% | 45% | 42% | 23% | 41% | 33% | 33% | 80% | 85% | 100% | 82% | 55% | -23% | -27% | -38% | -34% | -38% | -28% |
| USD 5Y x 5Y ATM vol. | -33% | -33% | -29% | -27% | -34% | -31% | -22% | -16% | 5% | 1% | 18% | 31% | 2% | 12% | 22% | -1% | 24% | -24% | 16% | -12% | -6% | 3% | 6% | 35% | 32% | 6% | 26% | 21% | 16% | 84% | 51% | 82% | 100% | 69% | -27% | -25% | -29% | -23% | -24% | -18% |
| USD 10Y x 20Y ATM vol. | -39% | -37% | -35% | -31% | -28% | -35% | -21% | -24% | 2% | -5% | -2% | 6% | -12% | -7% | 3% | -17% | 26% | -28% | 3% | -9% | -14% | -10% | 0% | 39% | 36% | 11% | 20% | 24% | 18% | 44% | 34% | 55% | 69% | 100% | -4% | -3% | -16% | -13% | -26% | -21% |
| US Govt. | -16% | -16% | -26% | -36% | -27% | -22% | -33% | -32% | 16% | -81% | -95% | -98% | -54% | -68% | -79% | -46% | -17% | 14% | -53% | 22% | -22% | -32% | -18% | 22% | 17% | 25% | 15% | 10% | 26% | -41% | -8% | -23% | -27% | -4% | 100% | 67% | 55% | 30% | -9% | -15% |
| European Govt. | 7% | 9% | 6% | -9% | 2% | 7% | 4% | 6% | -15% | -51% | -63% | -66% | -66% | -80% | -85% | -21% | -1% | -8% | -35% | 9% | -11% | -18% | -23% | -0% | 0% | -2% | -9% | -8% | -3% | -36% | -18% | -27% | -25% | -3% | 67% | 100% | 55% | 71% | 15% | 16% |
| US Corp. Bonds | 53% | 52% | 38% | 19% | 40% | 51% | 33% | 41% | -2% | -36% | -45% | -52% | -23% | -38% | -46% | 14% | -37% | 29% | -35% | 35% | 21% | 8% | 5% | -38% | -33% | -30% | -30% | -55% | -30% | -35% | -39% | -38% | -29% | -16% | 55% | 55% | 100% | 77% | 68% | 62% |
| European Corp. Bonds | 49% | 51% | 46% | 28% | 42% | 52% | 44% | 51% | -12% | -18% | -26% | -30% | -34% | -48% | -50% | 20% | -12% | -1% | -20% | 13% | 19% | 13% | -3% | -34% | -27% | -33% | -35% | -42% | -32% | -26% | -38% | -34% | -23% | -13% | 30% | 71% | 77% | 100% | 64% | 69% |
| US High Yield | 77% | 76% | 66% | 55% | 72% | 81% | 65% | 75% | -14% | 14% | 14% | 11% | 14% | 7% | 5% | 57% | -32% | 23% | 3% | 27% | 53% | 40% | 27% | -61% | -52% | -57% | -49% | -65% | -59% | -18% | -45% | -38% | -24% | -26% | -9% | 15% | 68% | 64% | 100% | 93% |
| European High Yield | 74% | 71% | 69% | 60% | 70% | 76% | 69% | 76% | -23% | 18% | 19% | 17% | 18% | 11% | 10% | 55% | -30% | 22% | 6% | 20% | 49% | 38% | 26% | -53% | -45% | -55% | -46% | -59% | -53% | -10% | -36% | -28% | -18% | -21% | -15% | 16% | 62% | 69% | 93% | 100% |
# Display the correlation heatmap
plt.figure(figsize=(16, 16))
plot_heatmap(data_pca_corr, annotate=False);
2.2 Analysis¶
# Perform PCA; obtain the loading matrix, scores, and explained variance
pca_model = pca.PCA(data=data_pca_normalised.values, standardize=True, normalize=False)
loadings = pca_model.loadings
scores = pca_model.scores
explained_variance = pca_model.eigenvals
explained_variance_ratio = explained_variance / explained_variance.sum() * 100
cumulative_explained_variance_ratio = pca_model.rsquare * 100
# Convert the numpy array loadings and scores to a pandas DataFrame for ease of viewing
scores_df = pd.DataFrame(
index=data_pca_normalised.index,
columns=[''.join(['PC', str(i + 1)]) for i in range(n_assets)],
data=scores)
loadings_df = pd.DataFrame(
index=data_pca_normalised.columns,
columns=[''.join(['PC', str(j + 1)]) for j in range(n_assets)],
data=loadings)
loadings_df.iloc[:, :6].style.format(formatter='{:+.2f}').map(func=lambda x: 'color:red' if x < 0. else 'color:blue')
| PC1 | PC2 | PC3 | PC4 | PC5 | PC6 | |
|---|---|---|---|---|---|---|
| MSCI World | -0.25 | -0.05 | -0.07 | +0.02 | -0.08 | +0.12 |
| S&P 500 | -0.24 | -0.05 | -0.01 | +0.03 | -0.07 | +0.15 |
| Euro Stoxx 50 | -0.23 | -0.01 | +0.02 | +0.03 | -0.22 | +0.06 |
| Euro Stoxx Banks | -0.20 | +0.05 | -0.03 | -0.03 | -0.26 | -0.06 |
| CDX.IG 5Y | -0.23 | -0.01 | +0.06 | -0.00 | -0.21 | +0.04 |
| CDX.HY 5Y | -0.24 | -0.03 | +0.04 | +0.05 | -0.14 | +0.12 |
| iTraxx Europe 5Y | -0.22 | +0.01 | +0.08 | +0.06 | -0.27 | -0.13 |
| iTraxx Crossover 5Y | -0.23 | +0.00 | +0.07 | +0.12 | -0.16 | -0.02 |
| Italy - Germany 10Y | +0.09 | -0.03 | +0.05 | -0.04 | +0.35 | +0.53 |
| US 2Y | -0.10 | +0.25 | +0.16 | -0.11 | +0.04 | +0.13 |
| US 5Y | -0.10 | +0.30 | +0.12 | -0.02 | -0.01 | +0.11 |
| US 10Y | -0.09 | +0.31 | +0.07 | +0.04 | -0.04 | +0.04 |
| Germany 2Y | -0.10 | +0.23 | -0.14 | -0.17 | +0.05 | -0.13 |
| Germany 5Y | -0.09 | +0.28 | -0.14 | -0.16 | -0.02 | -0.13 |
| Germany 10Y | -0.08 | +0.31 | -0.10 | -0.09 | -0.04 | -0.10 |
| US B/E Inflation 10Y | -0.18 | +0.09 | -0.11 | +0.08 | +0.06 | +0.18 |
| Dollar Index | +0.12 | +0.08 | +0.44 | +0.07 | +0.04 | +0.16 |
| EUR/USD | -0.11 | -0.05 | -0.44 | -0.12 | -0.05 | -0.20 |
| USD/JPY | -0.04 | +0.20 | +0.26 | -0.01 | +0.04 | -0.01 |
| Precious Metals | -0.07 | -0.09 | -0.35 | +0.04 | +0.11 | +0.16 |
| Industrial Metals | -0.16 | +0.03 | -0.20 | +0.09 | +0.18 | +0.09 |
| Crude Oil | -0.13 | +0.07 | -0.14 | +0.08 | +0.12 | +0.20 |
| Agriculture | -0.08 | +0.06 | -0.26 | +0.06 | +0.26 | +0.13 |
| S&P 500 1M ATM imp. vol. | +0.22 | +0.03 | -0.04 | +0.07 | -0.06 | -0.12 |
| S&P 500 1M 80% imp. vol. | +0.19 | +0.02 | -0.05 | +0.09 | -0.13 | -0.07 |
| EUR/USD 3M ATM vol. | +0.17 | -0.01 | -0.08 | -0.10 | -0.30 | +0.42 |
| USD/JPY 3M ATM vol. | +0.16 | +0.03 | -0.19 | +0.03 | -0.29 | +0.16 |
| AUD/USD 3M ATM vol. | +0.20 | +0.05 | -0.02 | -0.06 | -0.31 | +0.14 |
| USD/CAD 3M ATM vol. | +0.19 | -0.01 | -0.12 | -0.05 | -0.32 | +0.26 |
| USD 2Y x 2Y ATM vol. | +0.06 | +0.21 | -0.05 | +0.38 | -0.02 | -0.00 |
| USD 3M x 10Y ATM vol. | +0.14 | +0.13 | -0.18 | +0.22 | -0.15 | +0.07 |
| USD 1Y x 10Y ATM vol. | +0.13 | +0.18 | -0.12 | +0.37 | -0.05 | +0.01 |
| USD 5Y x 5Y ATM vol. | +0.09 | +0.17 | -0.05 | +0.43 | +0.07 | -0.06 |
| USD 10Y x 20Y ATM vol. | +0.10 | +0.08 | +0.00 | +0.36 | +0.07 | -0.16 |
| US Govt. | +0.09 | -0.31 | -0.10 | -0.01 | +0.02 | -0.08 |
| European Govt. | +0.01 | -0.29 | +0.12 | +0.17 | -0.10 | -0.09 |
| US Corp. Bonds | -0.12 | -0.26 | -0.04 | +0.17 | +0.06 | +0.01 |
| European Corp. Bonds | -0.13 | -0.22 | +0.14 | +0.25 | -0.06 | +0.06 |
| US High Yield | -0.22 | -0.07 | +0.02 | +0.16 | +0.05 | +0.11 |
| European High Yield | -0.22 | -0.05 | +0.03 | +0.20 | -0.04 | +0.06 |
# Creating a pandas DataFrame for easy visualization of explained variance
var_explained_df = pd.DataFrame(
{
"Dim": [x for x in range(len(explained_variance) + 1)],
"% var. explained": np.insert(explained_variance_ratio, 0, 0),
"% cum. var. explained": cumulative_explained_variance_ratio
}
).set_index('Dim')
var_explained_df
| % var. explained | % cum. var. explained | |
|---|---|---|
| Dim | ||
| 0 | 0.000000 | 0.000000 |
| 1 | 34.950111 | 34.950111 |
| 2 | 19.612252 | 54.562363 |
| 3 | 7.861521 | 62.423884 |
| 4 | 6.735117 | 69.159001 |
| 5 | 4.431694 | 73.590695 |
| 6 | 3.200052 | 76.790747 |
| 7 | 2.573463 | 79.364210 |
| 8 | 2.358504 | 81.722714 |
| 9 | 1.910359 | 83.633073 |
| 10 | 1.750775 | 85.383848 |
| 11 | 1.631243 | 87.015092 |
| 12 | 1.431360 | 88.446452 |
| 13 | 1.307656 | 89.754108 |
| 14 | 1.282720 | 91.036828 |
| 15 | 1.145210 | 92.182038 |
| 16 | 1.060773 | 93.242811 |
| 17 | 0.900256 | 94.143067 |
| 18 | 0.781254 | 94.924322 |
| 19 | 0.754358 | 95.678680 |
| 20 | 0.657798 | 96.336478 |
| 21 | 0.631121 | 96.967599 |
| 22 | 0.457441 | 97.425039 |
| 23 | 0.391695 | 97.816735 |
| 24 | 0.321597 | 98.138332 |
| 25 | 0.290392 | 98.428724 |
| 26 | 0.264747 | 98.693471 |
| 27 | 0.228898 | 98.922369 |
| 28 | 0.209170 | 99.131539 |
| 29 | 0.162305 | 99.293845 |
| 30 | 0.143251 | 99.437096 |
| 31 | 0.128480 | 99.565576 |
| 32 | 0.097474 | 99.663050 |
| 33 | 0.093696 | 99.756746 |
| 34 | 0.060678 | 99.817424 |
| 35 | 0.057775 | 99.875199 |
| 36 | 0.041031 | 99.916229 |
| 37 | 0.034187 | 99.950416 |
| 38 | 0.020866 | 99.971283 |
| 39 | 0.017745 | 99.989028 |
| 40 | 0.010972 | 100.000000 |
# Find the first dimension where % cum. var. explained exceeds 75%
dim_75 = var_explained_df[var_explained_df['% cum. var. explained'] >= 75].index[0]
# Find the first dimension where % cum. var. explained exceeds 80%
dim_80 = var_explained_df[var_explained_df['% cum. var. explained'] >= 80].index[0]
point_index_75 = next(i for i, y in enumerate(var_explained_df['% cum. var. explained']) if 75 <= y)
point_index_80 = next(i for i, y in enumerate(var_explained_df['% cum. var. explained']) if 80 <= y)
plt.figure(figsize=(12,7))
plt.plot(range(len(explained_variance) + 1), var_explained_df['% cum. var. explained'], marker = 'x', linestyle = '--', color ='black')
plt.fill_between(range(len(explained_variance) + 1), 75, 80, color='yellow', alpha=0.5)
plt.fill_between(range(len(explained_variance) + 1), 80, var_explained_df['% cum. var. explained'].max()+5, color='lightblue', alpha=0.5)
point_index_75 = next(i for i, y in enumerate(var_explained_df['% cum. var. explained']) if 75 <= y)
point_index_80 = next(i for i, y in enumerate(var_explained_df['% cum. var. explained']) if 80 <= y)
plt.scatter(point_index_75, var_explained_df['% cum. var. explained'][point_index_75], color='blue', edgecolor='black', zorder=5, s=80)
plt.scatter(point_index_80, var_explained_df['% cum. var. explained'][point_index_80], color='blue', edgecolor='black', zorder=5, s=80)
plt.hlines(y=var_explained_df['% cum. var. explained'][point_index_75], xmin=0, xmax=dim_75, colors='black', linestyles='dashed', linewidth=0.5, label='75% Variance Explained')
plt.vlines(x=dim_75, ymin=0, ymax=var_explained_df['% cum. var. explained'][point_index_75], colors='black', linestyles='dashed', linewidth=0.5)
plt.hlines(y=var_explained_df['% cum. var. explained'][point_index_80], xmin=0, xmax=dim_80, colors='black', linestyles='dashed', linewidth=0.5, label='80% Variance Explained')
plt.vlines(x=dim_80, ymin=0, ymax=var_explained_df['% cum. var. explained'][point_index_80], colors='black', linestyles='dashed', linewidth=0.5)
plt.ylim(0, 105)
plt.xlim(0, len(explained_variance))
plt.yticks(np.arange(0, 105, 5))
plt.xticks(np.arange(0, len(explained_variance) + 1, 1))
plt.title("Explained Variance by Components")
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
Text(0, 0.5, 'Cumulative Explained Variance')
# Display the results
print(f"The number of components needed to explain 75% of the variance is: {dim_75}\n"
f"The number of components needed to explain 80% of the variance is: {dim_80}")
The number of components needed to explain 75% of the variance is: 6 The number of components needed to explain 80% of the variance is: 8
rsquare_df = pd.DataFrame(index=data_pca_normalised.columns, columns=['PC' + str(i) for i in range(1, 7)])
for factor in data_pca_normalised.columns:
for i in range(6):
model = sm.OLS(data_pca_normalised[factor], sm.add_constant(scores_df.iloc[:, i])).fit()
rsquare_df.at[factor, 'PC' + str(i + 1)] = model.rsquared
# Display R² Dataframe
rsquare_df
| PC1 | PC2 | PC3 | PC4 | PC5 | PC6 | |
|---|---|---|---|---|---|---|
| MSCI World | 0.847734 | 0.018132 | 0.015373 | 0.001434 | 0.011094 | 0.017129 |
| S&P 500 | 0.791634 | 0.019967 | 0.00022 | 0.001818 | 0.007687 | 0.030013 |
| Euro Stoxx 50 | 0.747764 | 0.000497 | 0.001075 | 0.001861 | 0.088351 | 0.00425 |
| Euro Stoxx Banks | 0.574965 | 0.017899 | 0.002831 | 0.002532 | 0.120362 | 0.004685 |
| CDX.IG 5Y | 0.719317 | 0.001376 | 0.011846 | 0.00001 | 0.075298 | 0.002283 |
| CDX.HY 5Y | 0.779932 | 0.009609 | 0.004181 | 0.007669 | 0.035685 | 0.01767 |
| iTraxx Europe 5Y | 0.66557 | 0.001616 | 0.022407 | 0.010025 | 0.130195 | 0.022045 |
| iTraxx Crossover 5Y | 0.726087 | 0.000088 | 0.016429 | 0.041374 | 0.047959 | 0.000557 |
| Italy - Germany 10Y | 0.10482 | 0.006027 | 0.007327 | 0.004699 | 0.221695 | 0.357315 |
| US 2Y | 0.147492 | 0.480846 | 0.083955 | 0.033085 | 0.003437 | 0.020123 |
| US 5Y | 0.142163 | 0.702009 | 0.042211 | 0.001648 | 0.000329 | 0.014355 |
| US 10Y | 0.108046 | 0.772846 | 0.017462 | 0.004516 | 0.002325 | 0.002426 |
| Germany 2Y | 0.144694 | 0.405909 | 0.062542 | 0.074811 | 0.004935 | 0.020125 |
| Germany 5Y | 0.109984 | 0.623796 | 0.060162 | 0.066282 | 0.000839 | 0.023122 |
| Germany 10Y | 0.082478 | 0.743492 | 0.028793 | 0.021847 | 0.003587 | 0.01257 |
| US B/E Inflation 10Y | 0.444796 | 0.060727 | 0.040191 | 0.01617 | 0.006221 | 0.043129 |
| Dollar Index | 0.192258 | 0.05414 | 0.613654 | 0.013217 | 0.002187 | 0.030921 |
| EUR/USD | 0.161131 | 0.023387 | 0.614468 | 0.040719 | 0.004057 | 0.05095 |
| USD/JPY | 0.022078 | 0.314967 | 0.213099 | 0.000244 | 0.002974 | 0.000039 |
| Precious Metals | 0.065329 | 0.070289 | 0.381549 | 0.005353 | 0.019842 | 0.032732 |
| Industrial Metals | 0.352014 | 0.005616 | 0.122303 | 0.021939 | 0.059225 | 0.011116 |
| Crude Oil | 0.22381 | 0.040065 | 0.063522 | 0.016739 | 0.024498 | 0.050765 |
| Agriculture | 0.093311 | 0.032897 | 0.206967 | 0.0089 | 0.116779 | 0.02036 |
| S&P 500 1M ATM imp. vol. | 0.666423 | 0.005311 | 0.005788 | 0.014132 | 0.00561 | 0.019025 |
| S&P 500 1M 80% imp. vol. | 0.506817 | 0.004585 | 0.006513 | 0.021419 | 0.027947 | 0.005879 |
| EUR/USD 3M ATM vol. | 0.42058 | 0.001755 | 0.019578 | 0.024658 | 0.15485 | 0.229027 |
| USD/JPY 3M ATM vol. | 0.346118 | 0.007402 | 0.116504 | 0.002239 | 0.144285 | 0.034107 |
| AUD/USD 3M ATM vol. | 0.575299 | 0.018893 | 0.001686 | 0.010511 | 0.169019 | 0.025335 |
| USD/CAD 3M ATM vol. | 0.48517 | 0.001017 | 0.047605 | 0.005852 | 0.182719 | 0.089845 |
| USD 2Y x 2Y ATM vol. | 0.047473 | 0.356244 | 0.009461 | 0.378987 | 0.000768 | 0.000007 |
| USD 3M x 10Y ATM vol. | 0.277087 | 0.125866 | 0.102971 | 0.130892 | 0.038537 | 0.006659 |
| USD 1Y x 10Y ATM vol. | 0.219929 | 0.240691 | 0.043299 | 0.373238 | 0.004693 | 0.000139 |
| USD 5Y x 5Y ATM vol. | 0.116821 | 0.224435 | 0.009351 | 0.500718 | 0.008305 | 0.00528 |
| USD 10Y x 20Y ATM vol. | 0.152617 | 0.047261 | 0.000036 | 0.340525 | 0.007533 | 0.033055 |
| US Govt. | 0.10792 | 0.767181 | 0.028529 | 0.00009 | 0.000967 | 0.007538 |
| European Govt. | 0.000543 | 0.679288 | 0.047878 | 0.076168 | 0.01787 | 0.011189 |
| US Corp. Bonds | 0.214779 | 0.517772 | 0.004407 | 0.073595 | 0.005459 | 0.00006 |
| European Corp. Bonds | 0.223653 | 0.377516 | 0.064352 | 0.170729 | 0.007107 | 0.00514 |
| US High Yield | 0.706725 | 0.043526 | 0.001479 | 0.064874 | 0.005255 | 0.01512 |
| European High Yield | 0.664684 | 0.019961 | 0.002604 | 0.108529 | 0.002195 | 0.003937 |
# Finding the best factors for PC1, PC2, and PC3
best_factors = rsquare_df[['PC1', 'PC2', 'PC3']].astype(float).idxmax()
best_factors_df = pd.DataFrame(best_factors).T.rename(index={0: 'Best Factor'})
best_factors_df
| PC1 | PC2 | PC3 | |
|---|---|---|---|
| Best Factor | MSCI World | US 10Y | EUR/USD |
# Loop through the first three principal components
for pc in ['PC1', 'PC2', 'PC3']:
best_factor = best_factors_df.at['Best Factor', pc]
# Prepare data for plotting
plot_data = pd.DataFrame()
plot_data[best_factor] = data_pca_normalised[best_factor].cumsum()
plot_data[pc] = scores_df[pc].cumsum()
# Use the custom function for plotting
fig, ax = plot_timeseries_one_axis(plot_data);
ax.set_title(f"Cumulative Sum of {best_factor} and {pc}")
ax.legend()
plt.show()
3. Study of the Traditional Assets¶
# Read Data
change_freq = '4w'
data_trad = pd.read_csv(filepath_or_buffer='_'.join(['Traditional Assets_Multi-asset PCA', 'CHANGES=' + change_freq]) + '.csv',
index_col=0, header=0,
parse_dates=True).astype(np.float64)
n_assets_trad = data_trad.shape[1]
data_trad
| US Govt. Bonds | European Govt. Bonds | US IG Corp. Bonds | European IG Corp. Bonds | US High Yield | European High Yield | MSCI World | S&P 500 | Crude Oil | Industrial Metals | Precious Metals | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 2005-06-10 | 0.010386 | 0.017674 | 0.013421 | 0.019308 | 0.029171 | 0.029893 | 0.014866 | 0.024410 | 0.028562 | 0.012891 | 0.005019 |
| 2005-07-06 | 0.001192 | -0.004078 | 0.002829 | -0.002100 | 0.009255 | 0.009683 | 0.003253 | -0.001373 | 0.112833 | -0.011761 | -0.022402 |
| 2005-08-03 | -0.008140 | -0.005336 | -0.004783 | -0.005385 | 0.015161 | 0.014480 | 0.047262 | 0.042098 | -0.020653 | 0.067694 | 0.033755 |
| 2005-08-31 | 0.016193 | 0.015940 | 0.016234 | 0.016116 | 0.003449 | 0.007675 | -0.006708 | -0.018139 | 0.105520 | 0.000730 | -0.027033 |
| 2005-09-27 | -0.011720 | 0.000517 | -0.014359 | -0.000039 | -0.009020 | 0.001002 | 0.011620 | -0.002921 | -0.064567 | 0.019788 | 0.063330 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2022-11-30 | 0.026953 | 0.022098 | 0.047535 | 0.025662 | 0.021258 | 0.032360 | 0.086199 | 0.083794 | -0.098184 | 0.092527 | 0.066374 |
| 2022-12-28 | -0.006943 | -0.045961 | -0.006454 | -0.019763 | -0.006197 | -0.008446 | -0.054265 | -0.074297 | -0.023005 | 0.026770 | 0.045765 |
| 2023-01-25 | 0.028097 | 0.030792 | 0.041032 | 0.027126 | 0.035909 | 0.029603 | 0.068924 | 0.060770 | 0.013441 | 0.066096 | 0.052185 |
| 2023-02-22 | -0.025541 | -0.026568 | -0.031878 | -0.012932 | -0.019851 | 0.002439 | -0.008393 | -0.004677 | -0.081716 | -0.072963 | -0.070958 |
| 2023-03-22 | 0.029796 | 0.014531 | 0.023493 | -0.000127 | 0.005266 | -0.009522 | -0.014729 | -0.011974 | -0.045359 | -0.064751 | 0.053697 |
232 rows × 11 columns
# Normalise the data to have zero mean and standard deviation 1; show the normalised data
data_trad_normalised = (data_trad - data_trad.mean()) / data_trad.std()
data_trad_normalised
| US Govt. Bonds | European Govt. Bonds | US IG Corp. Bonds | European IG Corp. Bonds | US High Yield | European High Yield | MSCI World | S&P 500 | Crude Oil | Industrial Metals | Precious Metals | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 2005-06-10 | 0.637123 | 1.088591 | 0.489450 | 1.186558 | 0.785765 | 0.764510 | 0.183719 | 0.345216 | 0.322210 | 0.165716 | 0.007486 |
| 2005-07-06 | -0.067827 | -0.391676 | -0.009991 | -0.261775 | 0.146020 | 0.152702 | -0.037128 | -0.159374 | 1.077765 | -0.203723 | -0.485432 |
| 2005-08-03 | -0.783406 | -0.477266 | -0.368965 | -0.483981 | 0.335740 | 0.297913 | 0.799827 | 0.691387 | -0.119036 | 0.987022 | 0.524058 |
| 2005-08-31 | 1.082408 | 0.970536 | 0.622093 | 0.970584 | -0.040484 | 0.091916 | -0.226582 | -0.487514 | 1.012196 | -0.016538 | -0.568689 |
| 2005-09-27 | -1.057891 | -0.078988 | -0.820468 | -0.122343 | -0.441008 | -0.110103 | 0.121989 | -0.189670 | -0.512761 | 0.269072 | 1.055710 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2022-11-30 | 1.907484 | 1.389646 | 2.098054 | 1.616380 | 0.531607 | 0.839217 | 1.540318 | 1.507397 | -0.814159 | 1.359171 | 1.110415 |
| 2022-12-28 | -0.691614 | -3.241830 | -0.447752 | -1.456655 | -0.350336 | -0.396110 | -1.131010 | -1.586561 | -0.140126 | 0.373712 | 0.739942 |
| 2023-01-25 | 1.995159 | 1.981227 | 1.791424 | 1.715428 | 1.002223 | 0.755746 | 1.211784 | 1.056797 | 0.186639 | 0.963064 | 0.855358 |
| 2023-02-22 | -2.117699 | -1.922162 | -1.646589 | -0.994542 | -0.788922 | -0.066579 | -0.258620 | -0.224048 | -0.666516 | -1.120919 | -1.358291 |
| 2023-03-22 | 2.125446 | 0.874675 | 0.964376 | -0.128285 | 0.017894 | -0.428692 | -0.379119 | -0.366858 | -0.340540 | -0.997850 | 0.882534 |
232 rows × 11 columns
pca_trad = pca.PCA(data=data_trad_normalised.values, standardize=True, normalize=False)
loadings_trad = pca_trad.loadings
scores_trad = pca_trad.scores
explained_variance_trad = pca_trad.eigenvals
explained_variance_ratio_trad = explained_variance_trad / explained_variance_trad.sum() * 100
cumulative_explained_variance_ratio_trad = np.cumsum(explained_variance_ratio_trad)
loadings_trad_df = pd.DataFrame(
index=data_trad_normalised.columns,
columns=[''.join(['PC', str(j + 1)]) for j in range(n_assets_trad)],
data=loadings_trad)
scores_trad_df = pd.DataFrame(
index=data_trad_normalised.index,
columns=[''.join(['PC', str(i + 1)]) for i in range(n_assets_trad)],
data=scores_trad)
var_explained_trad_df = pd.DataFrame(
{
"Dim": [x for x in range(1, len(explained_variance_trad) + 1)],
"% var. explained": explained_variance_ratio_trad,
"% cum. var. explained": cumulative_explained_variance_ratio_trad
}
).set_index('Dim')
var_explained_trad_df
| % var. explained | % cum. var. explained | |
|---|---|---|
| Dim | ||
| 1 | 46.628907 | 46.628907 |
| 2 | 22.968751 | 69.597658 |
| 3 | 10.418276 | 80.015934 |
| 4 | 6.599419 | 86.615353 |
| 5 | 4.322212 | 90.937565 |
| 6 | 3.884804 | 94.822369 |
| 7 | 3.014014 | 97.836383 |
| 8 | 1.055166 | 98.891549 |
| 9 | 0.646350 | 99.537898 |
| 10 | 0.346765 | 99.884663 |
| 11 | 0.115337 | 100.000000 |
# Compute the correlation of each asset with PC1 and PC2
correlation_trad_pc1 = data_trad_normalised.corrwith(scores_trad_df['PC1'])
correlation_trad_pc2 = data_trad_normalised.corrwith(scores_trad_df['PC2'])
# Create a DataFrame to display the correlations
correlation_trad_df = pd.DataFrame({
'PC1': correlation_trad_pc1,
'PC2': correlation_trad_pc2
}, index=data_trad_normalised.columns)
correlation_trad_df.style.format(formatter='{:+.2f}').map(func=lambda x: 'color:red' if x < 0. else 'color:blue')
| PC1 | PC2 | |
|---|---|---|
| US Govt. Bonds | -0.05 | +0.88 |
| European Govt. Bonds | -0.32 | +0.83 |
| US IG Corp. Bonds | -0.77 | +0.52 |
| European IG Corp. Bonds | -0.76 | +0.48 |
| US High Yield | -0.92 | -0.12 |
| European High Yield | -0.89 | -0.12 |
| MSCI World | -0.88 | -0.25 |
| S&P 500 | -0.86 | -0.22 |
| Crude Oil | -0.42 | -0.50 |
| Industrial Metals | -0.59 | -0.43 |
| Precious Metals | -0.41 | +0.03 |
plt.figure(figsize=(10, 6))
plt.scatter(correlation_trad_df['PC1'], correlation_trad_df['PC2'], alpha=0.7)
# Ajouter les noms des points
for i in correlation_trad_df.index:
plt.text(correlation_trad_df.loc[i, 'PC1'], correlation_trad_df.loc[i, 'PC2'], i)
# Ajouter des titres et des étiquettes
plt.title("Correlation with PC1 and PC2")
plt.xlabel("Correlation with PC1")
plt.ylabel("Correlation with PC2")
plt.grid(True)
plt.show()
4. Systematic Strategies¶
# Read Data
change_freq = '4w'
data_syst = pd.read_csv(filepath_or_buffer='_'.join(['Systematic Strategies_Multi-asset PCA', 'CHANGES=' + change_freq]) + '.csv',
index_col=0, header=0,
parse_dates=True).astype(np.float64)
n_assets_syst = data_syst.shape[1]
data_syst
| Global Quality Income vs. Index | Global Equity Machine Learning | European Value vs. Index | Commodity Congestion | Multi-asset Trend | Repo Carry | Long Rates Vol. | Tail Hedge -SDV | Tail Hedge - SCV | Tail Hedge - Dynamic Put Ratio | Put Spread | Intraday Trend Following | Strong vs. Weak Balance Sheets | FX Carry | FX Value | Short Weekly Tail Puts | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2005-06-10 | 0.051755 | NaN | 0.014358 | 0.024636 | 0.026185 | NaN | NaN | -0.3152 | NaN | NaN | -0.005487 | NaN | NaN | 0.011203 | 0.017098 | NaN |
| 2005-07-06 | 0.013968 | NaN | 0.008676 | 0.003710 | 0.011092 | NaN | NaN | -0.9980 | NaN | NaN | -0.000930 | NaN | NaN | -0.009567 | 0.019454 | NaN |
| 2005-08-03 | -0.045271 | NaN | 0.010801 | 0.000871 | -0.003290 | NaN | NaN | 0.0469 | NaN | NaN | -0.006943 | NaN | NaN | 0.005324 | -0.007057 | NaN |
| 2005-08-31 | 0.008821 | NaN | 0.007892 | 0.002410 | 0.020470 | NaN | NaN | -0.3767 | NaN | NaN | 0.002420 | NaN | NaN | -0.003842 | 0.013888 | NaN |
| 2005-09-27 | 0.028851 | NaN | -0.004355 | 0.002619 | 0.012372 | NaN | NaN | -0.5122 | NaN | NaN | -0.000688 | NaN | NaN | 0.020388 | 0.008387 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2022-11-30 | -0.047987 | -0.013439 | 0.011862 | 0.018969 | -0.038204 | 0.008922 | -0.000696 | 1.6405 | 0.011194 | -0.002338 | -0.024075 | 0.009247 | 0.017270 | -0.025170 | 0.014000 | 0.002686 |
| 2022-12-28 | 0.017136 | 0.003181 | 0.011447 | 0.006698 | 0.002398 | -0.004487 | 0.016413 | -4.3861 | -0.015289 | 0.005956 | 0.022461 | -0.012614 | 0.025709 | -0.023997 | 0.011885 | 0.002531 |
| 2023-01-25 | -0.050958 | -0.003026 | 0.033966 | 0.005789 | -0.030022 | 0.012565 | -0.028874 | -0.7273 | 0.016171 | 0.004102 | -0.023007 | -0.002602 | -0.014000 | -0.009368 | 0.009310 | 0.001728 |
| 2023-02-22 | 0.020082 | 0.021498 | 0.011651 | 0.019872 | 0.000874 | -0.002980 | 0.022189 | -0.4369 | -0.004838 | 0.000700 | 0.001410 | 0.001495 | 0.003643 | 0.011919 | -0.007237 | 0.002778 |
| 2023-03-22 | -0.011847 | 0.006434 | -0.025383 | -0.003288 | -0.039426 | -0.004457 | -0.004162 | 0.6684 | 0.000329 | -0.000871 | 0.002799 | -0.003091 | 0.039701 | -0.015427 | 0.016647 | 0.004708 |
232 rows × 16 columns
Some columns contain missing values. Let's check the proportion of missing values in each column :
# Null values in dataset
null = data_syst.isnull().sum().sort_values(ascending=False)
percent_missing = (data_syst.isnull().sum() / data_syst.shape[0] * 100).sort_values(ascending=False)
missing_data = pd.concat([null, percent_missing], axis=1, keys=['Total missing', 'Percent missing'])
missing_data = missing_data.rename(columns= { "index": " column name"})
missing_data.sort_values(by ='Total missing', ascending = False)
| Total missing | Percent missing | |
|---|---|---|
| Tail Hedge - SCV | 147 | 63.362069 |
| Long Rates Vol. | 96 | 41.379310 |
| Repo Carry | 86 | 37.068966 |
| Intraday Trend Following | 35 | 15.086207 |
| Short Weekly Tail Puts | 35 | 15.086207 |
| Strong vs. Weak Balance Sheets | 34 | 14.655172 |
| Global Equity Machine Learning | 9 | 3.879310 |
| Tail Hedge - Dynamic Put Ratio | 9 | 3.879310 |
| Global Quality Income vs. Index | 0 | 0.000000 |
| European Value vs. Index | 0 | 0.000000 |
| Commodity Congestion | 0 | 0.000000 |
| Multi-asset Trend | 0 | 0.000000 |
| Tail Hedge -SDV | 0 | 0.000000 |
| Put Spread | 0 | 0.000000 |
| FX Carry | 0 | 0.000000 |
| FX Value | 0 | 0.000000 |
Many columns have too many missing values, so we can't fill in the missing data with sample means, medians or other rudimentary interpolations that will overwhelm out the signal of the data with noise. More, if those columns are removed, there's a risk of losing too much information. So, we will use probabilistic PCA which compute factors where some of the data are missing.
# Create Probabilistic PCA (PPCA) object
ppca = PPCA();
# Number of pricipal components
n_pc = 2
# Fit the PPCA model with principal components
ppca.fit(data=data_syst.values, d=n_pc, verbose=True)
# Obtain the principal components
component_mat = ppca.transform();
1.0 0.7729708684588203 0.9301835983744469 2.7491901090738775 0.2389130331871705 0.018428175901263777 0.04504531876524365 0.03674361976616114 0.023715858867782602 0.012813721429227765 0.005225934986834435 0.000559019916584913 0.001959939193413973 0.0030679093783592126 0.0033343649110932905 0.0031500720705690544 0.0027608113928707656 0.002309504496337711 0.0018721304818993811 0.001483922415887351 0.0011568400493260356 0.0008905228482347649 0.0006788107955162381 0.0005134281463505808 0.00038593174355594506 0.0002886403032626994 0.00021499143719405645 0.00015959557091260734 0.00011814353042938208 8.725560823674705e-05
variance_explained = ppca.var_exp
components = ppca.data
model_params = ppca.C
scores_syst_df = pd.DataFrame(
index=data_syst.index,
columns=[''.join(['PC', str(i + 1)]) for i in range(n_pc)],
data=component_mat)
scores_syst_df
| PC1 | PC2 | |
|---|---|---|
| 2005-06-10 | -0.885883 | 1.143596 |
| 2005-07-06 | -0.316118 | -0.200146 |
| 2005-08-03 | 1.175464 | 0.318211 |
| 2005-08-31 | -0.413634 | -0.317862 |
| 2005-09-27 | -0.105973 | -0.312105 |
| ... | ... | ... |
| 2022-11-30 | 0.331661 | 2.103474 |
| 2022-12-28 | -0.692903 | -2.270541 |
| 2023-01-25 | 2.094534 | 2.072162 |
| 2023-02-22 | -0.031694 | -0.316743 |
| 2023-03-22 | -0.501151 | -0.945952 |
232 rows × 2 columns
# Compute the correlation of each asset with PC1 and PC2
correlation_syst_pc1 = data_syst.corrwith(scores_syst_df['PC1'])
correlation_syst_pc2 = data_syst.corrwith(scores_syst_df['PC2'])
# Create a DataFrame to display the correlations
correlation_syst_df = pd.DataFrame({
'PC1': correlation_syst_pc1,
'PC2': correlation_syst_pc2
}, index=data_syst.columns)
correlation_syst_df.style.format(formatter='{:+.2f}').map(func=lambda x: 'color:red' if x < 0. else 'color:blue')
| PC1 | PC2 | |
|---|---|---|
| Global Quality Income vs. Index | -0.54 | -0.38 |
| Global Equity Machine Learning | +0.14 | -0.16 |
| European Value vs. Index | +0.67 | +0.34 |
| Commodity Congestion | -0.17 | +0.55 |
| Multi-asset Trend | -0.45 | -0.25 |
| Repo Carry | -0.17 | +0.71 |
| Long Rates Vol. | -0.48 | -0.23 |
| Tail Hedge -SDV | -0.64 | +0.55 |
| Tail Hedge - SCV | -0.69 | +0.67 |
| Tail Hedge - Dynamic Put Ratio | -0.78 | +0.34 |
| Put Spread | -0.62 | -0.56 |
| Intraday Trend Following | -0.70 | +0.36 |
| Strong vs. Weak Balance Sheets | -0.54 | -0.29 |
| FX Carry | +0.60 | +0.31 |
| FX Value | -0.18 | +0.17 |
| Short Weekly Tail Puts | -0.17 | +0.30 |
plt.figure(figsize=(10, 6))
plt.scatter(correlation_syst_df['PC1'], correlation_syst_df['PC2'], alpha=0.7)
# Ajouter les noms des points
for i in correlation_syst_df.index:
plt.text(correlation_syst_df.loc[i, 'PC1'], correlation_syst_df.loc[i, 'PC2'], i)
# Ajouter des titres et des étiquettes
plt.title("Correlation with PC1 and PC2")
plt.xlabel("Correlation with PC1")
plt.ylabel("Correlation with PC2")
plt.grid(True)
plt.show()
5. Portfolio Optimization¶
Data
equity_data = pd.read_csv('equity_factor_performance_data.csv',
index_col=0, header=0,
parse_dates=True).astype(np.float64)
equity_data
| Growth | Low Risk | Low Size | Momentum | Value | Quality Income | |
|---|---|---|---|---|---|---|
| 2018-09-03 | 3147.275589 | 2521.365122 | 4447.639970 | 7230.019236 | 5367.396587 | 2746.458678 |
| 2018-09-04 | 3147.869877 | 2517.854224 | 4423.624532 | 7240.434396 | 5339.050310 | 2744.511885 |
| 2018-09-05 | 3123.127074 | 2512.737537 | 4432.075319 | 7221.320382 | 5358.611721 | 2765.911701 |
| 2018-09-06 | 3106.574794 | 2518.676856 | 4399.732989 | 7182.130965 | 5311.798170 | 2752.442728 |
| 2018-09-07 | 3101.329324 | 2510.221028 | 4376.879336 | 7139.770025 | 5291.714564 | 2749.406337 |
| ... | ... | ... | ... | ... | ... | ... |
| 2023-08-28 | 4722.945678 | 4272.219410 | 5360.124710 | 11363.308993 | 7459.215794 | 4465.448906 |
| 2023-08-29 | 4806.024404 | 4309.305708 | 5436.687835 | 11535.042889 | 7543.478208 | 4482.673495 |
| 2023-08-30 | 4841.395229 | 4326.786715 | 5465.827827 | 11598.645379 | 7544.416986 | 4483.233991 |
| 2023-08-31 | 4852.078911 | 4308.159090 | 5473.408710 | 11640.134272 | 7521.389497 | 4450.985502 |
| 2023-09-01 | 4884.347114 | 4317.863443 | 5501.348987 | 11709.032625 | 7570.535386 | 4448.521921 |
1305 rows × 6 columns
Next, we find the simple daily returns for each of the 12 assets using the pct_change() method, since our data object is a pandas DataFrame. We use simple returns since they have the property of being asset-additive, which is necessary since we need to compute portfolios returns:
# Compute daily simple returns
daily_returns = (
equity_data.pct_change()
.dropna(
# Drop the first row since we have NaN's
# The first date 2018-09-03 does not have a value since it is our cut-off date
axis = 0,
how = 'any',
inplace = False
)
)
# Examine the last 5 rows
daily_returns.tail(n = 5)
| Growth | Low Risk | Low Size | Momentum | Value | Quality Income | |
|---|---|---|---|---|---|---|
| 2023-08-28 | 0.005015 | 0.006035 | 0.005886 | 0.006481 | 0.007407 | 0.003419 |
| 2023-08-29 | 0.017590 | 0.008681 | 0.014284 | 0.015113 | 0.011296 | 0.003857 |
| 2023-08-30 | 0.007360 | 0.004057 | 0.005360 | 0.005514 | 0.000124 | 0.000125 |
| 2023-08-31 | 0.002207 | -0.004305 | 0.001387 | 0.003577 | -0.003052 | -0.007193 |
| 2023-09-01 | 0.006650 | 0.002253 | 0.005105 | 0.005919 | 0.006534 | -0.000553 |
The simple daily returns may be visualized using line charts, density plots, and histograms, which are covered in my other post on visualizing asset data. Even though the visualizations in that post use the ggplot2 package in R, the plotnine package, or any other Python graphics librarires, can be employed to produce them in Python. For now, let us annualize the daily returns over the 5-year period from 2018-09-03 to 2023-09-01. We assume the number of trading days in a year is computed as follows:
daily_returns.mean() * 253
Growth 0.117797 Low Risk 0.122932 Low Size 0.084286 Momentum 0.118064 Value 0.103472 Quality Income 0.111360 dtype: float64
The annualized variance-covariance matrix of the returns can be computed using built-in pandas method cov():
daily_returns.cov() * 253
| Growth | Low Risk | Low Size | Momentum | Value | Quality Income | |
|---|---|---|---|---|---|---|
| Growth | 0.064674 | 0.044142 | 0.062492 | 0.050407 | 0.056067 | 0.034156 |
| Low Risk | 0.044142 | 0.036917 | 0.042614 | 0.039387 | 0.040846 | 0.029625 |
| Low Size | 0.062492 | 0.042614 | 0.085696 | 0.053194 | 0.077119 | 0.043524 |
| Momentum | 0.050407 | 0.039387 | 0.053194 | 0.048724 | 0.051333 | 0.034334 |
| Value | 0.056067 | 0.040846 | 0.077119 | 0.051333 | 0.073120 | 0.043255 |
| Quality Income | 0.034156 | 0.029625 | 0.043524 | 0.034334 | 0.043255 | 0.035410 |
Minimize Risk given Levels of Return
$$\min _{\vec{w}} \sqrt{\vec{w}^T \hat{\Sigma} \vec{w}}$$subject to $$ \begin{aligned} & \vec{w}^T \hat{\mu}=\overline{r}_P \\ & \vec{w}^T \vec{1}=1 \quad \text { (Full investment) } \\ & \overrightarrow{0} \leq \vec{w} \leq \vec{1} \quad \text { (Long only) } \end{aligned} $$
Maximize Return given Levels of Risk
$$\max _{\vec{w}} \vec{w}^T \hat{\mu}$$subject to
$$\begin{aligned} & \vec{w}^T \hat{\Sigma} \vec{w}=\overline{\sigma}_P \\ & \vec{w}^T \overrightarrow{1}=1 \quad \text { (Full investment) } \\ & \overrightarrow{0} \leq \vec{w} \leq \overrightarrow{1} \quad \text { (Long only) }\end{aligned}$$In absence of other constraints, the above model is loosely referred to as the "unconstrained" portfolio optimization model. Solving the mathematical model yields a set of optimal weights representing a set of optimal portfolios.
5.1 Monte Carlo Simulation¶
The first task is to simulate a random set of portfolios to visualize the risk-return profiles of our given set of assets. We use a for loop to simulate random vectors of asset weights, computing the expected portfolio return and standard deviation for each permutation of random weights. Again, we ensure that each random weight vector is subject to the long-positions-only and full-investment constraints.
Monte Carlo Simulation
The empty containers we instantiate are lists; they are mutable and so growing them will not be memory inefficient.
# instantiate empty list containers for returns and sd
list_portfolio_returns = []
list_portfolio_sd = []
# For loop to simulate 5000 random weight vectors (numpy array objects)
for p in range(5000):
# Return random floats in the half-open interval [0.0, 1.0)
weights = np.random.random(size = equity_data.shape[1])
# Normalize to unity
# The /= operator divides the array by the sum of the array and rebinds "weights" to the new object
weights /= np.sum(weights)
# Lists are mutable so growing will not be memory inefficient
list_portfolio_returns.append(portfolio_returns(daily_returns, weights))
list_portfolio_sd.append(portfolio_sd(daily_returns, weights))
# Convert list to numpy arrays
port_returns = np.array(object = list_portfolio_returns)
port_sd = np.array(object = list_portfolio_sd)
Let us examine the simulation results. In particular, the highest and the lowest expected portfolio returns are as follows:
# Max expected return
max_expected_return = round(max(port_returns), 4)
# Min expected return
min_expected_return = round(min(port_returns), 4)
print(f'Highest expected portfolio returns : {max_expected_return}\nLowest expected portfolio returns : {min_expected_return}')
Highest expected portfolio returns : 0.1196 Lowest expected portfolio returns : 0.0967
On the other hand, the highest and lowest volatility measures are recorded as:
# Max sd
max_vol = round(max(port_sd), 4)
# Min sd
min_vol = round(min(port_sd), 4)
print(f'Highest volatility measure : {max_vol}\nLowest volatility measure : {min_vol}')
Highest volatility measure : 0.2671 Lowest volatility measure : 0.1909
sharpe_ratio = [r / sd for r, sd in zip(port_returns, port_sd)] # Sharpe Ratio calculation
# Create a scatter plot
fig = px.scatter(
x=port_sd, y=port_returns, color=sharpe_ratio,
labels={'x': 'Portfolio Standard Deviation (Annualized)', 'y': 'Expected Portfolio Return (Annualized)'},
color_continuous_scale='Viridis', # You can choose any color scale
)
# Update layout
fig.update_layout(
title='Mean-Standard Deviation Diagram',
xaxis_tickformat='.2%',
yaxis_tickformat='.2%',
)
# Add colorbar title
fig.update_layout(coloraxis_colorbar=dict(title="Sharpe Ratio"))
# Show the plot
fig.show()
Each point in the diagram above represents a permutation of expected-return-standard-deviation value pair. The points are color coded such that the magnitudes of the Sharpe ratios, defined as $SR = \frac{\mu_P - r_f}{\sigma_P}$, can be readily observed for each expected-return-standard-deviation pairing. For simplicity, we assume that $r_f \equiv 0$.
5.2 Optimal Portfolio¶
We will use dictionaries inside of a tuple to represent the constraints:
#We use an anonymous lambda function
constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
Next, the bound values for the weights:
# This creates 12 tuples of (0, 1), all of which exist within a container tuple
# We essentially create a sequence of (min, max) pairs
bounds = tuple(
(0, 1) for w in weights
)
We also need to supply a starting list of weights, which essentially functions as an initial guess. For our purposes, this will be an equal weight array:
# Repeat the list with the value (1 / 12) 12 times, and convert list to array
equal_weights = np.array(
[1 / equity_data.shape[1]] * equity_data.shape[1]
)
Minimum Variance Portfolio
# Minimize sd
min_sd_results = minimize(
# Objective function
fun = lambda weights: portfolio_sd(daily_returns, weights),
# Initial guess, which is the equal weight array
x0 = equal_weights,
method = 'SLSQP',
bounds = bounds,
constraints = constraints
)
Minimum Variance Weights
optimal_weights_df = pd.DataFrame(min_sd_results.x.reshape(1, -1), columns=equity_data.columns, index=['Weights'])
optimal_weights_df
| Growth | Low Risk | Low Size | Momentum | Value | Quality Income | |
|---|---|---|---|---|---|---|
| Weights | 2.832792e-17 | 0.434213 | 0.0 | 3.469447e-17 | 0.0 | 0.565787 |
Expected return (Minimum variance portfolio)
# Expected return
min_sd_port_return = portfolio_returns(daily_returns, min_sd_results["x"])
print(f'Expected return (Minimum variance portfolio) : {round(min_sd_port_return, 4)}')
Expected return (Minimum variance portfolio) : 0.1164
Standard deviation (Minimum variance portfolio)
# Standard deviation
min_sd_port_sd = portfolio_sd(daily_returns, min_sd_results["x"])
print(f'Standard deviation (Minimum variance portfolio) : {round(min_sd_port_sd, 4)}')
Standard deviation (Minimum variance portfolio) : 0.1813
Sharpe ratio (Minimum variance portfolio)
# Sharpe ratio
min_sd_port_sharpe = min_sd_port_return / min_sd_port_sd
print(f'Sharpe ratio (Minimum variance portfolio) : {round(min_sd_port_sharpe, 4)}')
Sharpe ratio (Minimum variance portfolio) : 0.6421
Portfolio returns (Minimum variance portfolio)
# Calculate the portfolio returns
min_var_returns = daily_returns.dot(min_sd_results.x)
# Calculate the cumulative sum of the portfolio returns
cumulative_portfolio_returns = min_var_returns.cumsum()
print(f'Portfolio returns :\n{min_var_returns}')
Portfolio returns :
2018-09-04 -0.001006
2018-09-05 0.003529
2018-09-06 -0.001729
2018-09-07 -0.002082
2018-09-10 0.003524
...
2023-08-28 0.004555
2023-08-29 0.005952
2023-08-30 0.001832
2023-08-31 -0.005939
2023-09-01 0.000665
Length: 1304, dtype: float64
print(f'Cumulative sum of the portfolio returns :\n{cumulative_portfolio_returns}')
Cumulative sum of the portfolio returns :
2018-09-04 -0.001006
2018-09-05 0.002524
2018-09-06 0.000795
2018-09-07 -0.001287
2018-09-10 0.002237
...
2023-08-28 0.597355
2023-08-29 0.603306
2023-08-30 0.605138
2023-08-31 0.599199
2023-09-01 0.599864
Length: 1304, dtype: float64
cumulative_portfolio_returns.plot(figsize=(10, 6), title='Cumulative Portfolio Returns')
plt.xlabel('Date')
plt.ylabel('Returns')
plt.grid(True)
plt.show()
5.3 Montly returns¶
5.3 Long-Only¶
daily_returns = daily_returns.iloc[1:]
# cumulative compounds
monthly_returns = daily_returns.resample('M').apply(lambda x: (1 + x).prod() - 1)
monthly_returns.head()
| Growth | Low Risk | Low Size | Momentum | Value | Quality Income | |
|---|---|---|---|---|---|---|
| 2018-09-30 | -0.003543 | 0.008228 | -0.002006 | -0.006172 | -0.004550 | 0.015995 |
| 2018-10-31 | -0.082113 | -0.070511 | -0.075950 | -0.083624 | -0.078524 | -0.066693 |
| 2018-11-30 | 0.024886 | 0.039180 | 0.021326 | 0.011756 | 0.019698 | 0.047711 |
| 2018-12-31 | -0.083202 | -0.075134 | -0.129404 | -0.090721 | -0.122200 | -0.083466 |
| 2019-01-31 | 0.103440 | 0.072108 | 0.122880 | 0.089383 | 0.126459 | 0.071022 |
long_only_min_variance_portfolio_weights = optimize_long_only_min_variance_portfolio(monthly_returns)
long_only_min_variance_portfolio_weights.head()
| Growth | Low Risk | Low Size | Momentum | Value | Quality Income | |
|---|---|---|---|---|---|---|
| 2018-09-30 | 0.166667 | 0.166667 | 0.166667 | 0.166667 | 0.166667 | 0.166667 |
| 2018-10-31 | 0.166667 | 0.166667 | 0.166667 | 0.166667 | 0.166667 | 0.166667 |
| 2018-11-30 | 0.166665 | 0.141698 | 0.212719 | 0.166665 | 0.195608 | 0.116645 |
| 2018-12-31 | 0.166663 | 0.116725 | 0.237718 | 0.216661 | 0.195606 | 0.066626 |
| 2019-01-31 | 0.208338 | 0.116726 | 0.187742 | 0.249995 | 0.170571 | 0.066627 |
# Calculate and display the PnL
# Shift the weights, as returns are realized in the next month
long_only_portfolio_returns = (monthly_returns * long_only_min_variance_portfolio_weights.shift(1)).sum(axis=1) * 12
monthly_pnl_long_only = long_only_portfolio_returns.cumsum()
monthly_pnl_long_only
2018-09-30 0.000000
2018-10-31 -0.914830
2018-11-30 -0.585717
2018-12-31 -1.795308
2019-01-31 -0.550894
...
2023-05-31 5.862406
2023-06-30 6.613881
2023-07-31 6.962976
2023-08-31 6.705498
2023-09-30 6.748303
Freq: M, Length: 61, dtype: float64
# Plot the PnL
monthly_pnl_long_only.plot(figsize=(10, 6), title='Monthly Portfolio PnL')
plt.xlabel('Date')
plt.ylabel('PnL')
plt.grid(True)
plt.show()
5.3 Long-Short¶
We will perform the same monthly portfolio optimization, but this time we are permitted to take short positions and must ensure that the Gross Market Value (GMV) of the portfolio is precisely 1.
monthly_returns = daily_returns.resample('M').apply(lambda x: (1 + x).prod() - 1)
long_short_min_variance_portfolio_weights = optimize_long_short_min_variance_portfolio(monthly_returns)
long_short_min_variance_portfolio_weights
| Growth | Low Risk | Low Size | Momentum | Value | Quality Income | |
|---|---|---|---|---|---|---|
| 2018-09-30 | 0.166667 | 0.166667 | 0.166667 | 0.166667 | 0.166667 | 0.166667 |
| 2018-10-31 | 0.141672 | 0.141672 | 0.141672 | 0.141672 | 0.141672 | 0.141672 |
| 2018-11-30 | 0.091683 | 0.091680 | 0.141672 | 0.141672 | 0.141672 | 0.091653 |
| 2018-12-31 | 0.041683 | 0.041680 | 0.141672 | 0.141672 | 0.141672 | 0.041653 |
| 2019-01-31 | 0.041683 | 0.041680 | 0.091699 | 0.141672 | 0.091671 | -0.008376 |
| ... | ... | ... | ... | ... | ... | ... |
| 2023-05-31 | 0.000692 | 0.002484 | -0.003848 | 0.002103 | -0.002845 | 0.005976 |
| 2023-06-30 | 0.000893 | 0.002422 | -0.003860 | 0.002131 | -0.002873 | 0.005766 |
| 2023-07-31 | 0.001031 | 0.002403 | -0.003883 | 0.002133 | -0.002899 | 0.005716 |
| 2023-08-31 | 0.001116 | 0.002416 | -0.003925 | 0.002149 | -0.002926 | 0.005717 |
| 2023-09-30 | 0.001155 | 0.002454 | -0.003973 | 0.002176 | -0.002958 | 0.005727 |
61 rows × 6 columns
# Calculate and display the PnL
# Shift the weights, as returns are realized in the next month
long_short_portfolio_returns = (monthly_returns * long_short_min_variance_portfolio_weights.shift(1)).sum(axis=1) * 12
monthly_pnl_long_short = long_short_portfolio_returns.cumsum()
monthly_pnl_long_short
2018-09-30 0.000000
2018-10-31 -0.914830
2018-11-30 -0.635074
2018-12-31 -1.483045
2019-01-31 -0.783892
...
2023-05-31 -0.527225
2023-06-30 -0.527232
2023-07-31 -0.527726
2023-08-31 -0.527783
2023-09-30 -0.527984
Freq: M, Length: 61, dtype: float64